home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2004 July
/
CMCD0704.ISO
/
Software
/
Shareware
/
Comunicatii
/
jaeger
/
jaeger.exe
/
{app}
/
Tools
/
ToolGoogleAlerts.py
< prev
next >
Wrap
Text File
|
2004-04-03
|
13KB
|
441 lines
#!/bin/python
"""
ToolGoogleAlerts.py
David Janes
BlogMatrix
2004.03.30
"""
import sys
import urllib
import time
import pprint
import poplib
import re
import os
import xml.sax.saxutils
import Tool
import cStringIO
block_re = """([^\s][^\n]*)$(.*?)^<([^\n]*)>$"""
block_rex = re.compile(block_re, re.I|re.MULTILINE|re.DOTALL)
remove_re = """^Remove this News Alert:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
remove_rex = re.compile(remove_re, re.I|re.MULTILINE|re.DOTALL)
confirm_re = """News Alerts.*Verification Email"""
confirm_rex = re.compile(confirm_re, re.I|re.MULTILINE|re.DOTALL)
verify_re = """^Verify this News Alert request:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
verify_rex = re.compile(verify_re, re.I|re.MULTILINE|re.DOTALL)
class ToolGoogleAlerts(Tool.ToolInterface):
"""
The interface for your tool. Simply create an instance of this object and Jaeger
will figure it out.
"""
def __init__(self):
Tool.ToolInterface.__init__(self, self.WEBSERVER)
def get_label(self, selected):
return "Google News Alerts"
def get_server(self, path):
"""
"""
return {
"/": ( self.serve_root, "" ),
"/setup": ( self.serve_setup, "Setup Google News Alerts" ),
"/alerts/feed": ( self.serve_feed, "Feed" ),
}.get(path)
def serve_root(self, operations, path, valuemap):
result = [
self.text_standard_header(path),
"""
<h2>Google News Alerts</h2>
<p class="first">
This extension lets you convert Google News Alerts arriving by e-mail
into syndication feeds for Jäger.
<ul>
<li>
<a href="setup">Setup Google News Alerts</a> (do this once)
<li>
Create new alerts <b><a target=_blank href="http://www.google.com/newsalerts?hl=en">using Google</a></b>.
</ul>
""",
"""<h3>Your Alerts</h3>
<p class="first">
Subscribe to Alerts by dragging the <img align=bottom src="/images/xml.gif"> button to Jäger's window.
""",
]
#
# subscriptions
#
all_alerts = operations.get_userdata('alerts', None)
if not all_alerts:
all_alerts = {}
operations.set_userdata('alerts', all_alerts)
blogs = operations.get_weblogs()
blog_urls = map(lambda b : b.get("url", ""), blogs)
subscriptions = []
for subject, ( alert_list, alert_map ) in all_alerts.iteritems():
google_url = "http://news.google.com/news?hl=en&q=%s" % urllib.quote(subject)
feed_url = "alerts/feed?subject=%s" % urllib.quote(subject)
subscriptions.append(( subject, google_url, feed_url, alert_map.get("remove"), google_url in blog_urls ))
subscriptions.sort()
for is_subscribed in [ True, False ]:
if is_subscribed: result.append("<h4>Alerts that are subscribed to</h4><ul>")
else: result.append("<h4>Alerts that are NOT subscribed to</h4><ul>")
for subject, google_url, feed_url, cancel_url, subscribed in subscriptions:
if subscribed != is_subscribed: continue
result.append("""<li><a href=%s><img align=bottom border=0 src="/images/xml.gif"></a> <a target=_blank href=%s>%s</a> [<a target=_blank href=%s>Cancel</a>]""" % ( \
self.quote_attribute(feed_url),
self.quote_attribute(google_url),
self.escape_html(subject),
self.quote_attribute(cancel_url),
))
result.append("</ul>")
result.append(self.text_standard_footer())
return 200, "text/html", None, result
def serve_setup(self, operations, path, valuemap):
is_form = valuemap.get("_form")
hostname = valuemap.get("hostname")
if hostname == None:
hostname = operations.get_userdata("hostname", "")
else:
operations.set_userdata("hostname", hostname)
account = valuemap.get("account")
if account == None:
account = operations.get_userdata("account", "")
else:
operations.set_userdata("account", account)
password = valuemap.get("password")
if password == None:
password = operations.get_userdata("password", "")
else:
operations.set_userdata("password", password)
enabled = valuemap.get("enabled", "")
if not is_form:
enabled = operations.get_userdata("enabled", "")
else:
operations.set_userdata("enabled", enabled)
dodelete = valuemap.get("delete", "")
if not is_form:
dodelete = operations.get_userdata("delete", "")
else:
operations.set_userdata("delete", dodelete)
autosubscribe = valuemap.get("autosubscribe", "")
if not is_form:
autosubscribe = operations.get_userdata("autosubscribe", "on")
else:
operations.set_userdata("autosubscribe", autosubscribe)
result = [
self.text_standard_header(path),
"""
<h2>Setup Google News Alerts</h2>
<p class="first">
""",
"""\
<form method="POST">
<h3>Your E-Mail Address</h3>
<p class="first">
This is the e-mail address that you are telling Google News to send alerts to.
<p>
<table>
<tr>
<td align="left" width=120>E-Mail Address:</td>
<td></td>
<td><input type="text" name="account" value=\"""" + self.escape_html(account) + """"></td>
</tr>
</table>
<h3>E-Mail Account Information</h3>
<h4>POP3 Information</h4>
<p class="first">
If you receive your e-mail using a POP3 mail account,
fill in the information here.
We recommend that you create a seperate POP3 account for Google
News Alerts, but if you can't (or don't know how to) do this, it's OK.
<p>
<table>
<tr>
<td align="left" width=120>Hostname:</td>
<td> </td>
<td><input type="text" name="hostname" value=\"""" + self.escape_html(hostname) + """"></td>
</tr>
<tr>
<td align="left">Password:</td>
<td></td>
<td><input type="password" name="password" value=\"""" + self.escape_html(password) + """"></td>
</tr>
</table>
<h4>Outlook Information</h4>
<p class="first">
If you receive your e-mail using Outlook (<b>not</b> Outlook Express),
fill in the information here. <i>Coming soon</i>.
<h3>Options</h3>
<p class="first">
This extension will not run until you click the "Enable" button.
If you select "Auto-subscribe",
you will be prompted as soon as any new Google News Alert e-mails are discovered
(i.e. you may be doing something else entirely!)
<p>
<table>
<tr>
<td align="left" width=120>Enable:</td>
<td></td>
<td><input type="checkbox" name="enabled" """ + ( enabled and "checked" or "" ) + """></td>
</tr>
<tr>
<td align="left">Auto-subscribe:</td>
<td></td>
<td><input type="checkbox" name="autosubscribe" """ + ( autosubscribe and "checked" or "" ) + """></td>
</tr>
<tr>
<td></td>
<td></td>
<td><input type="Submit" name="submit" value="Save"></td>
</tr>
</table>
""" ]
result.append(self.text_standard_footer())
return 200, "text/html", None, result
def serve_feed(self, operations, path, valuemap):
subject_alert = valuemap.get("subject", "")
if not subject_alert:
return 404, "text/html", self.text_filenotfound()
print >> sys.stderr, "serve_feed: subject='%s'" % subject_alert
all_alerts = operations.get_userdata('alerts', None)
if not all_alerts:
all_alerts = {}
operations.set_userdata('alerts', all_alerts)
# pprint.pprint(all_alerts.keys())
alert = all_alerts.get(subject_alert)
if not alert:
return 404, "text/html", self.text_filenotfound()
alert_list, alert_map = alert
#
# output the RSS
#
result = []
result.append('<?xml version="1.0" encoding="iso-8859-1"?>')
result.append('<rss version="0.92">')
result.append('<channel>')
result.append("<title>Google News Alerts for %s</title>" % \
xml.sax.saxutils.escape(subject_alert))
result.append("<link>http://news.google.com/news?hl=en&q=%s</link>" % \
urllib.quote(subject_alert))
for x in range(len(alert_list), 0, -1):
itime, iurl, ititle, imap = alert_list[x - 1]
ituple = time.localtime(itime)
result.append(' <item>')
result.append(' <link>%s</link>' % xml.sax.saxutils.escape(iurl))
result.append(' <title>%s</title>' % xml.sax.saxutils.escape(ititle))
result.append(' <pubDate>%s</pubDate>' % time.strftime("%a, %d %b %Y %H:%M:%S GMT", ituple))
result.append(' </item>')
result.append('</channel>')
result.append('</rss>')
return 200, "application/xml", None, result
def pulse(self, operations):
hostname = operations.get_userdata("hostname", "")
if not hostname: return
account = operations.get_userdata("account", "")
if not account: return
password = operations.get_userdata("password", "")
if not password: return
enabled = operations.get_userdata("enabled", "")
if not enabled: return
interval_minutes = 10
try: interval_minutes = int(os.environ.get('JAEGER_ALERTS_INTERVAL', '10'))
except: pass
last_attempt = operations.get_userdata("last_attempt", 0)
if last_attempt + interval_minutes * 60 > time.time():
return
operations.set_userdata("last_attempt", time.time())
print >> sys.stderr, "ToolGoogleAlerts"
connection = poplib.POP3(hostname)
connection.user(account)
connection.pass_(password)
numMessages = len(connection.list()[1])
for i in range(numMessages):
lines = []
for j in connection.retr(i+1)[1]:
lines.append(j)
self.process_message(operations, "\n".join(lines))
# self.process_message(os.linesep.join(lines))
# pprint.pprint(("ToolGoogleAlerts: *******", i, lines))
def process_message(self, operations, message):
#
# get the current alerts
#
all_alerts = operations.get_userdata('alerts', None)
if not all_alerts:
all_alerts = {}
operations.set_userdata('alerts', all_alerts)
verified = operations.get_userdata('verified', None)
if not verified:
verified = {}
operations.set_userdata('verified', verified)
try:
import types
import email
import email.Utils
msg = email.message_from_string(message)
frm = msg.get('From')
if frm != 'newsalerts-noreply@google.com': return
date = msg.get('Date')
if not date: return
pdate = email.Utils.parsedate(date)
if not pdate: return
tdate = time.mktime(pdate)
if (time.time() - tdate) / 3600 > (24 * 3):
return
msgid = msg.get('Message-ID')
payload = msg.get_payload()
if not type(payload) in types.StringTypes: return
subject = msg.get('Subject')
# print >> sys.stderr, "VERIFY: A", subject
if confirm_rex.match(subject):
#
# This code handles verification
#
# print >> sys.stderr, "VERIFY: A.1"
match = verify_rex.search(payload)
if match:
# print >> sys.stderr, "VERIFY: B"
url = match.group(2)
if not verified.get(url):
# print >> sys.stderr, "VERIFY: C"
operations.log("verifying '%s'" % url)
try:
f = urllib.urlopen(url)
f.read()
f.close()
verified[url] = 1
except:
print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
traceback.print_exc(file = sys.stderr)
# print >> sys.stderr, "VERIFY: D"
elif subject[:20] == 'Google News Alert - ':
subject_alert = subject[20:]
#
#
alert = all_alerts.get(subject_alert, ( [], {} ))
# pprint.pprint(alert)
alert_list, alert_map = alert
is_new_subject = not bool(alert_list)
## make a list of all URLs that we know about
known_urls = {}
for item in alert_list:
known_urls[item[1]] = 1
## add new stuff
for match in block_rex.finditer(payload):
iurl = match.group(3)
if known_urls.get(iurl):
continue
ititle = match.group(1)
icontents = match.group(2).strip()
alert_list.append(( tdate, iurl, ititle, {} ))
known_urls[iurl] = 1
print "ToolGoogleAlerts:", iurl, ititle
## discover the 'remove url' for unsubscribing
match = remove_rex.search(payload)
if match:
alert_map["remove"] = match.group(2)
## save updated values
all_alerts[subject_alert] = ( alert_list, alert_map )
if is_new_subject and operations.get_userdata("enabled", "autosubscribe"):
feed_url = "%salerts/feed?subject=%s" % \
( self.tool_root(full=True), urllib.quote(subject_alert) )
operations.subscribe_to(feed_url)
# pprint.pprint(("ToolGoogleAlerts", alert ))
except:
print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
traceback.print_exc(file = sys.stderr)
#
# creating it will register it
#
ToolGoogleAlerts()